{smcl}
{com}{sf}{ul off}{txt}{.-}
      name:  {res}<unnamed>
       {txt}log:  {res}/Users/fcoell/Dropbox/PATSTAT/logs/../logs/Appendix_firms_patents_distr.smcl
  {txt}log type:  {res}smcl
 {txt}opened on:  {res}15 Apr 2020, 16:22:44
{txt}
{com}. di "******* Share of firms and patents across countries and industries *******"
{res}******* Share of firms and patents across countries and industries *******
{txt}
{com}. 
. * -----------------------------------------------------------------------------
. * Appendix Figure 8
. * Distribution of granted patents across countries - Initial vs final sample
. * -----------------------------------------------------------------------------
. // Countries share of granted patents (top 10) - Final sample
. /*      Note: if a patent is owned by firms in multiple countries, we assign the
>         patent to each firm. Table 13 counts unique patents, so total is lower. */
. use $tmp/finsample, clear
{txt}
{com}. gen top10 = (headq=="US" | headq=="JP" | headq=="DE" | headq=="GB" | headq=="FR" ///
>         | headq=="IT" | headq=="CA" | headq=="MX" | headq=="BR" | headq=="ES") 
{txt}
{com}.         // 10 major economies in 2000 (nominal gdp)
. collapse (max) granted (min) y, by(docdb_family_id headq top10)
{txt}
{com}. keep if y>=1992 & y<=2000 // sample period
{txt}(2,714,011 observations deleted)

{com}. collapse (sum) granted, by(headq top10)
{txt}
{com}. egen tot_granted = total(granted)
{txt}
{com}. gen sh_granted = granted / tot_granted
{txt}
{com}. egen top10_sh_granted = total(sh_granted) if top10 == 1
{txt}(53 missing values generated)

{com}. tabstat granted tot_granted sh_granted top10_sh_granted if top10 == 1, ///
>         by(headq) nototal

{txt}Summary statistics: mean
  by categories of: headq 

{ralign 5:headq} {...}
{c |}{...}
   granted  tot_gr~d  sh_gra~d  top10_~d
{hline 6}{c +}{hline 40}
{ralign 5:BR} {...}
{c |}{...}
 {res}      199    637949  .0003119  .7540383
{txt}{ralign 5:CA} {...}
{c |}{...}
 {res}     8296    637949  .0130042  .7540383
{txt}{ralign 5:DE} {...}
{c |}{...}
 {res}    32439    637949  .0508489  .7540383
{txt}{ralign 5:ES} {...}
{c |}{...}
 {res}     2330    637949  .0036523  .7540383
{txt}{ralign 5:FR} {...}
{c |}{...}
 {res}    21085    637949  .0330512  .7540383
{txt}{ralign 5:GB} {...}
{c |}{...}
 {res}     9862    637949  .0154589  .7540383
{txt}{ralign 5:IT} {...}
{c |}{...}
 {res}    15580    637949   .024422  .7540383
{txt}{ralign 5:JP} {...}
{c |}{...}
 {res}   250979    637949  .3934155  .7540383
{txt}{ralign 5:MX} {...}
{c |}{...}
 {res}       57    637949  .0000893  .7540383
{txt}{ralign 5:US} {...}
{c |}{...}
 {res}   140211    637949   .219784  .7540383
{txt}{hline 6}{c BT}{hline 40}

{com}. drop top10_sh_granted
{txt}
{com}. save $tmp/sh_granted_final, replace
{txt}file /tmp/sh_granted_final.dta saved

{com}. 
. // Countries share of granted patents (top 10) - Initial sample
. use $tmp/initsample, clear
{txt}
{com}. gen top10 = (headq=="US" | headq=="JP" | headq=="DE" | headq=="GB" | headq=="FR" ///
>         | headq=="IT" | headq=="CA" | headq=="MX" | headq=="BR" | headq=="ES") 
{txt}
{com}.         // 10 major economies in 2000 (nominal gdp)
. collapse (max) granted (min) y, by(docdb_family_id headq top10)
{txt}
{com}. keep if y>=1992 & y<=2000 // sample period
{txt}(13,348,345 observations deleted)

{com}. collapse (sum) granted, by(headq top10)
{txt}
{com}. egen tot_granted = total(granted)
{txt}
{com}. gen sh_granted = granted / tot_granted
{txt}
{com}. egen top10_sh_granted = total(sh_granted) if top10 == 1
{txt}(94 missing values generated)

{com}. tabstat granted tot_granted sh_granted top10_sh_granted if top10 == 1, ///
>         by(headq) nototal

{txt}Summary statistics: mean
  by categories of: headq 

{ralign 5:headq} {...}
{c |}{...}
   granted  tot_gr~d  sh_gra~d  top10_~d
{hline 6}{c +}{hline 40}
{ralign 5:BR} {...}
{c |}{...}
 {res}     1368   3046545   .000449  .8150653
{txt}{ralign 5:CA} {...}
{c |}{...}
 {res}    35210   3046545  .0115574  .8150653
{txt}{ralign 5:DE} {...}
{c |}{...}
 {res}   362525   3046545  .1189955  .8150653
{txt}{ralign 5:ES} {...}
{c |}{...}
 {res}    38056   3046545  .0124915  .8150653
{txt}{ralign 5:FR} {...}
{c |}{...}
 {res}   102339   3046545  .0335918  .8150653
{txt}{ralign 5:GB} {...}
{c |}{...}
 {res}    58429   3046545  .0191788  .8150653
{txt}{ralign 5:IT} {...}
{c |}{...}
 {res}    84729   3046545  .0278115  .8150653
{txt}{ralign 5:JP} {...}
{c |}{...}
 {res}  1045215   3046545  .3430821  .8150653
{txt}{ralign 5:MX} {...}
{c |}{...}
 {res}      676   3046545  .0002219  .8150653
{txt}{ralign 5:US} {...}
{c |}{...}
 {res}   754586   3046545  .2476858  .8150653
{txt}{hline 6}{c BT}{hline 40}

{com}. drop top10_sh_granted
{txt}
{com}. 
. // Plot
. foreach v of varlist granted sh_granted tot_granted {c -(}
{txt}  2{com}.         rename `v' `v'_init
{txt}  3{com}. {c )-}
{res}{txt}
{com}. merge 1:1 headq using $tmp/sh_granted_final
{res}
{txt}{col 5}Result{col 38}# of obs.
{col 5}{hline 41}
{col 5}not matched{col 30}{res}              41
{txt}{col 9}from master{col 30}{res}              41{txt}  (_merge==1)
{col 9}from using{col 30}{res}               0{txt}  (_merge==2)

{col 5}matched{col 30}{res}              63{txt}  (_merge==3)
{col 5}{hline 41}

{com}. twoway (scatter sh_granted sh_granted_init) (lfit sh_granted sh_granted_init), ///
>         scheme(lean1) legend(off) ytitle(Country share of patents - final sample) ///
>         xtitle(Country share of patents - initial sample)
{res}{txt}
{com}. graph export ../graph/ctry_patent_share.eps, replace
{txt}(file ../graph/ctry_patent_share.eps written in EPS format)

{com}. 
. reg sh_granted sh_granted_init

{txt}      Source {c |}       SS           df       MS      Number of obs   ={res}        63
{txt}{hline 13}{c +}{hline 34}   F(1, 61)        = {res}   838.96
{txt}       Model {c |} {res} .213038908         1  .213038908   {txt}Prob > F        ={res}    0.0000
{txt}    Residual {c |} {res} .015489937        61  .000253933   {txt}R-squared       ={res}    0.9322
{txt}{hline 13}{c +}{hline 34}   Adj R-squared   ={res}    0.9311
{txt}       Total {c |} {res} .228528844        62  .003685949   {txt}Root MSE        =   {res} .01594

{txt}{hline 16}{c TT}{hline 11}{hline 11}{hline 9}{hline 8}{hline 13}{hline 12}
{col 1}     sh_granted{col 17}{c |}      Coef.{col 29}   Std. Err.{col 41}      t{col 49}   P>|t|{col 57}     [95% Con{col 70}f. Interval]
{hline 16}{c +}{hline 11}{hline 11}{hline 9}{hline 8}{hline 13}{hline 12}
sh_granted_init {c |}{col 17}{res}{space 2} 1.056913{col 29}{space 2} .0364896{col 40}{space 1}   28.96{col 49}{space 3}0.000{col 57}{space 4} .9839471{col 70}{space 3} 1.129878
{txt}{space 10}_cons {c |}{col 17}{res}{space 2}-.0008991{col 29}{space 2} .0020895{col 40}{space 1}   -0.43{col 49}{space 3}0.668{col 57}{space 4}-.0050773{col 70}{space 3} .0032791
{txt}{hline 16}{c BT}{hline 11}{hline 11}{hline 9}{hline 8}{hline 13}{hline 12}
{res}{txt}
{com}. 
. 
. * -----------------------------------------------------------------------------
. * Appendix Figure 10
. * Distribution of granted patents across industries - Initial vs final sample
. * -----------------------------------------------------------------------------
. // Industry share of granted patents - Final sample
. /*      Note: a patent is only counted once in each nace (2 digit) industry.
>         A patent owned by firms in different industries is counted once in each
>         industry */
. use $tmp/finsample, clear
{txt}
{com}. gen nace = int(nace2_1) // 2-digit nace
{txt}
{com}. collapse (max) granted (min) y, by(docdb_family_id nace)
{txt}
{com}. keep if y>=1992 & y<=2000 // sample period
{txt}(2,745,995 observations deleted)

{com}. collapse (sum) granted, by(nace)
{txt}
{com}. egen tot_granted = total(granted)
{txt}
{com}. gen sh_granted = granted / tot_granted
{txt}
{com}. tabstat granted tot_granted sh_granted, by(nace) nototal

{txt}Summary statistics: mean
  by categories of: nace 

{ralign 8:nace} {...}
{c |}{...}
   granted  tot_gr~d  sh_gra~d
{hline 9}{c +}{hline 30}
{ralign 8:10} {...}
{c |}{...}
 {res}     6014    646339  .0093047
{txt}{ralign 8:11} {...}
{c |}{...}
 {res}      410    646339  .0006343
{txt}{ralign 8:12} {...}
{c |}{...}
 {res}      249    646339  .0003852
{txt}{ralign 8:13} {...}
{c |}{...}
 {res}     2176    646339  .0033667
{txt}{ralign 8:14} {...}
{c |}{...}
 {res}      491    646339  .0007597
{txt}{ralign 8:15} {...}
{c |}{...}
 {res}      948    646339  .0014667
{txt}{ralign 8:16} {...}
{c |}{...}
 {res}      143    646339  .0002212
{txt}{ralign 8:17} {...}
{c |}{...}
 {res}     1513    646339  .0023409
{txt}{ralign 8:18} {...}
{c |}{...}
 {res}      854    646339  .0013213
{txt}{ralign 8:19} {...}
{c |}{...}
 {res}      684    646339  .0010583
{txt}{ralign 8:20} {...}
{c |}{...}
 {res}    57814    646339  .0894484
{txt}{ralign 8:21} {...}
{c |}{...}
 {res}    31900    646339  .0493549
{txt}{ralign 8:22} {...}
{c |}{...}
 {res}    19186    646339  .0296841
{txt}{ralign 8:23} {...}
{c |}{...}
 {res}    11866    646339  .0183588
{txt}{ralign 8:24} {...}
{c |}{...}
 {res}    13451    646339  .0208111
{txt}{ralign 8:25} {...}
{c |}{...}
 {res}    11287    646339   .017463
{txt}{ralign 8:26} {...}
{c |}{...}
 {res}   202341    646339  .3130571
{txt}{ralign 8:27} {...}
{c |}{...}
 {res}    34065    646339  .0527045
{txt}{ralign 8:28} {...}
{c |}{...}
 {res}   143605    646339  .2221822
{txt}{ralign 8:29} {...}
{c |}{...}
 {res}    65290    646339  .1010151
{txt}{ralign 8:30} {...}
{c |}{...}
 {res}     5094    646339  .0078813
{txt}{ralign 8:31} {...}
{c |}{...}
 {res}     3127    646339   .004838
{txt}{ralign 8:32} {...}
{c |}{...}
 {res}    33796    646339  .0522883
{txt}{ralign 8:62} {...}
{c |}{...}
 {res}       35    646339  .0000542
{txt}{hline 9}{c BT}{hline 30}

{com}. save $tmp/sh_granted_final, replace
{txt}file /tmp/sh_granted_final.dta saved

{com}. 
. // Industry share of granted patents - Initial sample
. use $tmp/initsample, clear
{txt}
{com}. gen nace = int(nace2_1) // 2-digit nace
{txt}
{com}. collapse (max) granted (min) y, by(docdb_family_id nace)
{txt}
{com}. keep if y>=1992 & y<=2000 // sample period
{txt}(13,699,496 observations deleted)

{com}. collapse (sum) granted, by(nace)
{txt}
{com}. egen tot_granted = total(granted)
{txt}
{com}. gen sh_granted = granted / tot_granted
{txt}
{com}. tabstat granted tot_granted sh_granted, by(nace) nototal

{txt}Summary statistics: mean
  by categories of: nace 

{ralign 8:nace} {...}
{c |}{...}
   granted  tot_gr~d  sh_gra~d
{hline 9}{c +}{hline 30}
{ralign 8:10} {...}
{c |}{...}
 {res}    29192   3133067  .0093174
{txt}{ralign 8:11} {...}
{c |}{...}
 {res}     1505   3133067  .0004804
{txt}{ralign 8:12} {...}
{c |}{...}
 {res}     2107   3133067  .0006725
{txt}{ralign 8:13} {...}
{c |}{...}
 {res}     5124   3133067  .0016355
{txt}{ralign 8:14} {...}
{c |}{...}
 {res}     5701   3133067  .0018196
{txt}{ralign 8:15} {...}
{c |}{...}
 {res}     6389   3133067  .0020392
{txt}{ralign 8:16} {...}
{c |}{...}
 {res}     1402   3133067  .0004475
{txt}{ralign 8:17} {...}
{c |}{...}
 {res}     5983   3133067  .0019096
{txt}{ralign 8:18} {...}
{c |}{...}
 {res}     8957   3133067  .0028589
{txt}{ralign 8:19} {...}
{c |}{...}
 {res}     2751   3133067  .0008781
{txt}{ralign 8:20} {...}
{c |}{...}
 {res}   277551   3133067  .0885876
{txt}{ralign 8:21} {...}
{c |}{...}
 {res}   165760   3133067  .0529066
{txt}{ralign 8:22} {...}
{c |}{...}
 {res}    62518   3133067  .0199542
{txt}{ralign 8:23} {...}
{c |}{...}
 {res}    29086   3133067  .0092836
{txt}{ralign 8:24} {...}
{c |}{...}
 {res}    58714   3133067  .0187401
{txt}{ralign 8:25} {...}
{c |}{...}
 {res}    62701   3133067  .0200127
{txt}{ralign 8:26} {...}
{c |}{...}
 {res}   975613   3133067  .3113923
{txt}{ralign 8:27} {...}
{c |}{...}
 {res}   167451   3133067  .0534464
{txt}{ralign 8:28} {...}
{c |}{...}
 {res}   661696   3133067  .2111975
{txt}{ralign 8:29} {...}
{c |}{...}
 {res}   247851   3133067  .0791081
{txt}{ralign 8:30} {...}
{c |}{...}
 {res}    35325   3133067  .0112749
{txt}{ralign 8:31} {...}
{c |}{...}
 {res}    32065   3133067  .0102344
{txt}{ralign 8:32} {...}
{c |}{...}
 {res}   215517   3133067  .0687879
{txt}{ralign 8:42} {...}
{c |}{...}
 {res}     5751   3133067  .0018356
{txt}{ralign 8:43} {...}
{c |}{...}
 {res}    60271   3133067  .0192371
{txt}{ralign 8:62} {...}
{c |}{...}
 {res}     6086   3133067  .0019425
{txt}{hline 9}{c BT}{hline 30}

{com}. 
. // Plot
. foreach v of varlist granted sh_granted tot_granted {c -(}
{txt}  2{com}.         rename `v' `v'_init
{txt}  3{com}. {c )-}
{res}{txt}
{com}. merge 1:1 nace using $tmp/sh_granted_final
{res}
{txt}{col 5}Result{col 38}# of obs.
{col 5}{hline 41}
{col 5}not matched{col 30}{res}               2
{txt}{col 9}from master{col 30}{res}               2{txt}  (_merge==1)
{col 9}from using{col 30}{res}               0{txt}  (_merge==2)

{col 5}matched{col 30}{res}              24{txt}  (_merge==3)
{col 5}{hline 41}

{com}. twoway (scatter sh_granted sh_granted_init) (lfit sh_granted sh_granted_init), ///
>         scheme(lean1) legend(off) ytitle(Industry share of patents - final sample) ///
>         xtitle(Industry share of patents - initial sample)
{res}{txt}
{com}. graph export ../graph/ind_patent_share.eps, replace
{txt}(file ../graph/ind_patent_share.eps written in EPS format)

{com}. 
. reg sh_granted sh_granted_init

{txt}      Source {c |}       SS           df       MS      Number of obs   ={res}        24
{txt}{hline 13}{c +}{hline 34}   F(1, 22)        = {res}  2765.45
{txt}       Model {c |} {res} .132949514         1  .132949514   {txt}Prob > F        ={res}    0.0000
{txt}    Residual {c |} {res} .001057653        22  .000048075   {txt}R-squared       ={res}    0.9921
{txt}{hline 13}{c +}{hline 34}   Adj R-squared   ={res}    0.9917
{txt}       Total {c |} {res} .134007167        23  .005826399   {txt}Root MSE        =   {res} .00693

{txt}{hline 16}{c TT}{hline 11}{hline 11}{hline 9}{hline 8}{hline 13}{hline 12}
{col 1}     sh_granted{col 17}{c |}      Coef.{col 29}   Std. Err.{col 41}      t{col 49}   P>|t|{col 57}     [95% Con{col 70}f. Interval]
{hline 16}{c +}{hline 11}{hline 11}{hline 9}{hline 8}{hline 13}{hline 12}
sh_granted_init {c |}{col 17}{res}{space 2} 1.020284{col 29}{space 2} .0194016{col 40}{space 1}   52.59{col 49}{space 3}0.000{col 57}{space 4} .9800474{col 70}{space 3}  1.06052
{txt}{space 10}_cons {c |}{col 17}{res}{space 2} .0000507{col 29}{space 2} .0016215{col 40}{space 1}    0.03{col 49}{space 3}0.975{col 57}{space 4}-.0033122{col 70}{space 3} .0034135
{txt}{hline 16}{c BT}{hline 11}{hline 11}{hline 9}{hline 8}{hline 13}{hline 12}
{res}{txt}
{com}. 
. 
. * -----------------------------------------------------------------------------
. * Appendix Figure 9
. * Distribution of firms across countries - Initial vs final sample
. * -----------------------------------------------------------------------------
. // Country share of firms - Final sample
. use $tmp/finsample, clear
{txt}
{com}. gen top10 = (headq=="US" | headq=="JP" | headq=="DE" | headq=="GB" | headq=="FR" ///
>         | headq=="IT" | headq=="CA" | headq=="MX" | headq=="BR" | headq=="ES") 
{txt}
{com}. contract hrm_l2_id headq top10
{txt}
{com}. collapse (count) firms = hrm_l2_id, by(headq top10)
{txt}
{com}. egen tot_firms = total(firms)
{txt}
{com}. gen sh_firms = firms / tot_firms
{txt}
{com}. egen top10_sh_firms = total(sh_firms) if top10 == 1
{txt}(55 missing values generated)

{com}. tabstat firms tot_firms sh_firms top10_sh_firms if top10 == 1, by(headq) nototal

{txt}Summary statistics: mean
  by categories of: headq 

{ralign 5:headq} {...}
{c |}{...}
     firms  tot_fi~s  sh_firms  top10_~s
{hline 6}{c +}{hline 40}
{ralign 5:BR} {...}
{c |}{...}
 {res}       74     41058  .0018023   .815992
{txt}{ralign 5:CA} {...}
{c |}{...}
 {res}     1318     41058  .0321009   .815992
{txt}{ralign 5:DE} {...}
{c |}{...}
 {res}     5216     41058  .1270398   .815992
{txt}{ralign 5:ES} {...}
{c |}{...}
 {res}      476     41058  .0115934   .815992
{txt}{ralign 5:FR} {...}
{c |}{...}
 {res}     3058     41058    .07448   .815992
{txt}{ralign 5:GB} {...}
{c |}{...}
 {res}     2303     41058  .0560914   .815992
{txt}{ralign 5:IT} {...}
{c |}{...}
 {res}     2621     41058  .0638365   .815992
{txt}{ralign 5:JP} {...}
{c |}{...}
 {res}     6810     41058  .1658629   .815992
{txt}{ralign 5:MX} {...}
{c |}{...}
 {res}       29     41058  .0007063   .815992
{txt}{ralign 5:US} {...}
{c |}{...}
 {res}    11598     41058  .2824785   .815992
{txt}{hline 6}{c BT}{hline 40}

{com}. save $tmp/sh_firms_final, replace
{txt}file /tmp/sh_firms_final.dta saved

{com}. 
. // Country share of firms - Initial sample
. use $tmp/initsample, clear
{txt}
{com}. gen top10 = (headq=="US" | headq=="JP" | headq=="DE" | headq=="GB" | headq=="FR" ///
>         | headq=="IT" | headq=="CA" | headq=="MX" | headq=="BR" | headq=="ES") 
{txt}
{com}. contract hrm_l2_id headq top10
{txt}
{com}. collapse (count) firms = hrm_l2_id, by(headq top10)
{txt}
{com}. egen tot_firms = total(firms)
{txt}
{com}. gen sh_firms = firms / tot_firms
{txt}
{com}. egen top10_sh_firms = total(sh_firms) if top10 == 1
{txt}(94 missing values generated)

{com}. tabstat firms tot_firms sh_firms top10_sh_firms if top10 == 1, by(headq) nototal

{txt}Summary statistics: mean
  by categories of: headq 

{ralign 5:headq} {...}
{c |}{...}
     firms  tot_fi~s  sh_firms  top10_~s
{hline 6}{c +}{hline 40}
{ralign 5:BR} {...}
{c |}{...}
 {res}     1228    763581  .0016082  .7594008
{txt}{ralign 5:CA} {...}
{c |}{...}
 {res}    20207    763581  .0264635  .7594008
{txt}{ralign 5:DE} {...}
{c |}{...}
 {res}   130674    763581  .1711331  .7594008
{txt}{ralign 5:ES} {...}
{c |}{...}
 {res}    24152    763581  .0316299  .7594008
{txt}{ralign 5:FR} {...}
{c |}{...}
 {res}    40568    763581  .0531286  .7594008
{txt}{ralign 5:GB} {...}
{c |}{...}
 {res}    33308    763581  .0436208  .7594008
{txt}{ralign 5:IT} {...}
{c |}{...}
 {res}    47014    763581  .0615704  .7594008
{txt}{ralign 5:JP} {...}
{c |}{...}
 {res}    47375    763581  .0620432  .7594008
{txt}{ralign 5:MX} {...}
{c |}{...}
 {res}      611    763581  .0008002  .7594008
{txt}{ralign 5:US} {...}
{c |}{...}
 {res}   234727    763581  .3074029  .7594008
{txt}{hline 6}{c BT}{hline 40}

{com}. 
. // Plot
. foreach v of varlist firms tot_firms sh_firms {c -(}
{txt}  2{com}.         rename `v' `v'_init
{txt}  3{com}. {c )-}
{res}{txt}
{com}. merge 1:1 headq using $tmp/sh_firms_final
{res}
{txt}{col 5}Result{col 38}# of obs.
{col 5}{hline 41}
{col 5}not matched{col 30}{res}              39
{txt}{col 9}from master{col 30}{res}              39{txt}  (_merge==1)
{col 9}from using{col 30}{res}               0{txt}  (_merge==2)

{col 5}matched{col 30}{res}              65{txt}  (_merge==3)
{col 5}{hline 41}

{com}. twoway (scatter sh_firms sh_firms_init) (lfit sh_firms sh_firms_init), ///
>         scheme(lean1) legend(off) ytitle(Country share of firms - final sample) ///
>         xtitle(Country share of firms - initial sample)
{res}{txt}
{com}. graph export ../graph/ctry_firms_share.eps, replace
{txt}(file ../graph/ctry_firms_share.eps written in EPS format)

{com}. 
. reg sh_firms sh_firms_init

{txt}      Source {c |}       SS           df       MS      Number of obs   ={res}        65
{txt}{hline 13}{c +}{hline 34}   F(1, 63)        = {res}   388.93
{txt}       Model {c |} {res} .107175933         1  .107175933   {txt}Prob > F        ={res}    0.0000
{txt}    Residual {c |} {res} .017360566        63  .000275565   {txt}R-squared       ={res}    0.8606
{txt}{hline 13}{c +}{hline 34}   Adj R-squared   ={res}    0.8584
{txt}       Total {c |} {res} .124536499        64  .001945883   {txt}Root MSE        =   {res}  .0166

{txt}{hline 14}{c TT}{hline 11}{hline 11}{hline 9}{hline 8}{hline 13}{hline 12}
{col 1}     sh_firms{col 15}{c |}      Coef.{col 27}   Std. Err.{col 39}      t{col 47}   P>|t|{col 55}     [95% Con{col 68}f. Interval]
{hline 14}{c +}{hline 11}{hline 11}{hline 9}{hline 8}{hline 13}{hline 12}
sh_firms_init {c |}{col 15}{res}{space 2}  .909801{col 27}{space 2} .0461328{col 38}{space 1}   19.72{col 47}{space 3}0.000{col 55}{space 4}  .817612{col 68}{space 3}  1.00199
{txt}{space 8}_cons {c |}{col 15}{res}{space 2} .0013968{col 27}{space 2} .0021777{col 38}{space 1}    0.64{col 47}{space 3}0.524{col 55}{space 4} -.002955{col 68}{space 3} .0057487
{txt}{hline 14}{c BT}{hline 11}{hline 11}{hline 9}{hline 8}{hline 13}{hline 12}
{res}{txt}
{com}. 
. 
. * -----------------------------------------------------------------------------
. * Appendix Figure 11
. * Distribution of firms across industries - Initial vs final sample
. * -----------------------------------------------------------------------------
. // Industry share of firms - Initial sample
. use $tmp/finsample, clear
{txt}
{com}. gen nace = int(nace2_1) // 2-digit nace
{txt}
{com}. contract hrm_l2_id nace
{txt}
{com}. collapse (count) firms = hrm_l2_id, by(nace)
{txt}
{com}. egen tot_firms = total(firms)
{txt}
{com}. gen sh_firms = firms / tot_firms
{txt}
{com}. tabstat firms tot_firms sh_firms, by(nace) nototal

{txt}Summary statistics: mean
  by categories of: nace 

{ralign 8:nace} {...}
{c |}{...}
     firms  tot_fi~s  sh_firms
{hline 9}{c +}{hline 30}
{ralign 8:10} {...}
{c |}{...}
 {res}      648     41058  .0157826
{txt}{ralign 8:11} {...}
{c |}{...}
 {res}       60     41058  .0014613
{txt}{ralign 8:12} {...}
{c |}{...}
 {res}       39     41058  .0009499
{txt}{ralign 8:13} {...}
{c |}{...}
 {res}      194     41058   .004725
{txt}{ralign 8:14} {...}
{c |}{...}
 {res}      143     41058  .0034829
{txt}{ralign 8:15} {...}
{c |}{...}
 {res}      167     41058  .0040674
{txt}{ralign 8:16} {...}
{c |}{...}
 {res}       58     41058  .0014126
{txt}{ralign 8:17} {...}
{c |}{...}
 {res}      154     41058  .0037508
{txt}{ralign 8:18} {...}
{c |}{...}
 {res}      121     41058  .0029471
{txt}{ralign 8:19} {...}
{c |}{...}
 {res}      136     41058  .0033124
{txt}{ralign 8:20} {...}
{c |}{...}
 {res}     3404     41058  .0829071
{txt}{ralign 8:21} {...}
{c |}{...}
 {res}     2172     41058  .0529008
{txt}{ralign 8:22} {...}
{c |}{...}
 {res}     1688     41058  .0411126
{txt}{ralign 8:23} {...}
{c |}{...}
 {res}      784     41058  .0190949
{txt}{ralign 8:24} {...}
{c |}{...}
 {res}      690     41058  .0168055
{txt}{ralign 8:25} {...}
{c |}{...}
 {res}     1524     41058  .0371182
{txt}{ralign 8:26} {...}
{c |}{...}
 {res}     5989     41058  .1458668
{txt}{ralign 8:27} {...}
{c |}{...}
 {res}     2923     41058   .071192
{txt}{ralign 8:28} {...}
{c |}{...}
 {res}    11614     41058  .2828681
{txt}{ralign 8:29} {...}
{c |}{...}
 {res}     1970     41058  .0479809
{txt}{ralign 8:30} {...}
{c |}{...}
 {res}      974     41058  .0237225
{txt}{ralign 8:31} {...}
{c |}{...}
 {res}      664     41058  .0161722
{txt}{ralign 8:32} {...}
{c |}{...}
 {res}     4935     41058  .1201958
{txt}{ralign 8:62} {...}
{c |}{...}
 {res}        7     41058  .0001705
{txt}{hline 9}{c BT}{hline 30}

{com}. save $tmp/sh_firms_final, replace
{txt}file /tmp/sh_firms_final.dta saved

{com}. 
. // Industry share of firms - Final sample
. use $tmp/initsample, clear
{txt}
{com}. gen nace = int(nace2_1) // 2-digit nace
{txt}
{com}. contract hrm_l2_id nace
{txt}
{com}. collapse (count) firms = hrm_l2_id, by(nace)
{txt}
{com}. egen tot_firms = total(firms)
{txt}
{com}. gen sh_firms = firms / tot_firms
{txt}
{com}. tabstat firms tot_firms sh_firms, by(nace) nototal

{txt}Summary statistics: mean
  by categories of: nace 

{ralign 8:nace} {...}
{c |}{...}
     firms  tot_fi~s  sh_firms
{hline 9}{c +}{hline 30}
{ralign 8:10} {...}
{c |}{...}
 {res}    11340    763581  .0148511
{txt}{ralign 8:11} {...}
{c |}{...}
 {res}     1043    763581  .0013659
{txt}{ralign 8:12} {...}
{c |}{...}
 {res}     1249    763581  .0016357
{txt}{ralign 8:13} {...}
{c |}{...}
 {res}     2321    763581  .0030396
{txt}{ralign 8:14} {...}
{c |}{...}
 {res}     4393    763581  .0057532
{txt}{ralign 8:15} {...}
{c |}{...}
 {res}     3793    763581  .0049674
{txt}{ralign 8:16} {...}
{c |}{...}
 {res}      766    763581  .0010032
{txt}{ralign 8:17} {...}
{c |}{...}
 {res}     2929    763581  .0038359
{txt}{ralign 8:18} {...}
{c |}{...}
 {res}     3195    763581  .0041842
{txt}{ralign 8:19} {...}
{c |}{...}
 {res}     1477    763581  .0019343
{txt}{ralign 8:20} {...}
{c |}{...}
 {res}    44489    763581  .0582636
{txt}{ralign 8:21} {...}
{c |}{...}
 {res}    45136    763581   .059111
{txt}{ralign 8:22} {...}
{c |}{...}
 {res}    23807    763581  .0311781
{txt}{ralign 8:23} {...}
{c |}{...}
 {res}    10695    763581  .0140064
{txt}{ralign 8:24} {...}
{c |}{...}
 {res}     6004    763581   .007863
{txt}{ralign 8:25} {...}
{c |}{...}
 {res}    26539    763581   .034756
{txt}{ralign 8:26} {...}
{c |}{...}
 {res}   115664    763581  .1514757
{txt}{ralign 8:27} {...}
{c |}{...}
 {res}    56999    763581   .074647
{txt}{ralign 8:28} {...}
{c |}{...}
 {res}   193088    763581  .2528717
{txt}{ralign 8:29} {...}
{c |}{...}
 {res}    31548    763581  .0413159
{txt}{ralign 8:30} {...}
{c |}{...}
 {res}    16396    763581  .0214725
{txt}{ralign 8:31} {...}
{c |}{...}
 {res}    17119    763581  .0224194
{txt}{ralign 8:32} {...}
{c |}{...}
 {res}   109132    763581  .1429213
{txt}{ralign 8:42} {...}
{c |}{...}
 {res}     3198    763581  .0041882
{txt}{ralign 8:43} {...}
{c |}{...}
 {res}    26464    763581  .0346578
{txt}{ralign 8:62} {...}
{c |}{...}
 {res}     4797    763581  .0062822
{txt}{hline 9}{c BT}{hline 30}

{com}. 
. // Plot
. foreach v of varlist firms tot_firms sh_firms {c -(}
{txt}  2{com}.         rename `v' `v'_init
{txt}  3{com}. {c )-}
{res}{txt}
{com}. merge 1:1 nace using $tmp/sh_firms_final
{res}
{txt}{col 5}Result{col 38}# of obs.
{col 5}{hline 41}
{col 5}not matched{col 30}{res}               2
{txt}{col 9}from master{col 30}{res}               2{txt}  (_merge==1)
{col 9}from using{col 30}{res}               0{txt}  (_merge==2)

{col 5}matched{col 30}{res}              24{txt}  (_merge==3)
{col 5}{hline 41}

{com}. twoway (scatter sh_firms sh_firms_init) (lfit sh_firms sh_firms_init), ///
>         scheme(lean1) legend(off) ytitle(Industry share of firms - final sample) ///
>         xtitle(Industry share of firms - initial sample)
{res}{txt}
{com}. graph export ../graph/ind_firms_share.eps, replace
{txt}(file ../graph/ind_firms_share.eps written in EPS format)

{com}. 
. reg sh_firms sh_firms_init

{txt}      Source {c |}       SS           df       MS      Number of obs   ={res}        24
{txt}{hline 13}{c +}{hline 34}   F(1, 22)        = {res}   913.43
{txt}       Model {c |} {res} .093735042         1  .093735042   {txt}Prob > F        ={res}    0.0000
{txt}    Residual {c |} {res} .002257615        22  .000102619   {txt}R-squared       ={res}    0.9765
{txt}{hline 13}{c +}{hline 34}   Adj R-squared   ={res}    0.9754
{txt}       Total {c |} {res} .095992657        23  .004173594   {txt}Root MSE        =   {res} .01013

{txt}{hline 14}{c TT}{hline 11}{hline 11}{hline 9}{hline 8}{hline 13}{hline 12}
{col 1}     sh_firms{col 15}{c |}      Coef.{col 27}   Std. Err.{col 39}      t{col 47}   P>|t|{col 55}     [95% Con{col 68}f. Interval]
{hline 14}{c +}{hline 11}{hline 11}{hline 9}{hline 8}{hline 13}{hline 12}
sh_firms_init {c |}{col 15}{res}{space 2} 1.039412{col 27}{space 2} .0343915{col 38}{space 1}   30.22{col 47}{space 3}0.000{col 55}{space 4}  .968089{col 68}{space 3} 1.110736
{txt}{space 8}_cons {c |}{col 15}{res}{space 2} .0000402{col 27}{space 2} .0024845{col 38}{space 1}    0.02{col 47}{space 3}0.987{col 55}{space 4}-.0051124{col 68}{space 3} .0051927
{txt}{hline 14}{c BT}{hline 11}{hline 11}{hline 9}{hline 8}{hline 13}{hline 12}
{res}{txt}
{com}. 
. log close
      {txt}name:  {res}<unnamed>
       {txt}log:  {res}/Users/fcoell/Dropbox/PATSTAT/logs/../logs/Appendix_firms_patents_distr.smcl
  {txt}log type:  {res}smcl
 {txt}closed on:  {res}15 Apr 2020, 16:25:54
{txt}{.-}
{smcl}
{txt}{sf}{ul off}